// LEC		March 26, 2021
//
// STATE SIZE PROJECT
// Adiitional Prep File for CShapes-specific Supplementary Data





////////////////////////////////////////////////////////////////////////////////// 
// Prepare external state age data   (not essential for main results)
//////////////////////////////////////////////////////////////////////////////////
//cd "/Users/larsc/polybox/Redemption Rebellion/2020/rr_paper/data"
cd $ROOT
insheet using "epr_segment_level_dataset.csv", clear
keep countries_gwid year state_age
rename countries_gwid id
collapse (max)state_age, by (id year)
sort id year
cd $OUTDIR
save agedata.dta, replace


////////////////////////////////////////////////////////////////////////////////// 
// Prepare democracy data   (less optimal version, can be dropped)
//////////////////////////////////////////////////////////////////////////////////

//cd "/Users/larsc/polybox/Shared/NASTACdb/StateSizeProject"
cd $ROOT
insheet using "p4v2018.csv", clear
rename ccode countries_gwid 
keep countries_gwid year country polity2
sort countries_gwid year
cd $OUTDIR
save p4v, replace


////////////////////////////////////////////////////////////////////////////////// 
// Prepare democracy data (GW compatible with data from Kristian)
//////////////////////////////////////////////////////////////////////////////////

//cd "/Users/larsc/polybox/Shared/NASTACdb/StateSizeProject"
cd $ROOT
insheet using "ksgp4use.asc", clear delimiter(" ")
rename ccode countries_gwid 
keep countries_gwid year polity
drop if polity<-10
sort countries_gwid year 
cd $OUTDIR
save ksgp4use, replace


////////////////////////////////////////////////////////////////////////////////// 
// Prepare trade data (with data from VDem)
//////////////////////////////////////////////////////////////////////////////////

//cd "/Users/larsc/polybox/Shared/NASTACdb/StateSizeProject"
cd $ROOT
insheet using "trade_data_lec.csv", clear
replace gwcode = "." if gwcode=="NA"
destring gwcode, gen(countries_gwid) 
replace trade_to_gdp_real_us = "." if trade_to_gdp_real_us=="NA"
destring trade_to_gdp_real_us, gen(tradeopenness)
keep countries_gwid year tradeopenness
sort countries_gwid year 
drop if countries_gwid==.
drop if year==.
cd $OUTDIR
save tradedata, replace


//////////////////////////////////////////////////////////////////////////////////


cd $OUTDIR
use analysis_data.dta, clear

// Merge in external age data

sort id year
cd $OUTDIR
merge id year using agedata
drop _merge
drop if id==.
drop if year==.


// Merge in regional dummies (Note: GW-COW mismatch possible)
gen countries_gwid = id
//cd "/Users/larsc/polybox/Shared/NASTACdb/StateSizeProject"
cd $ROOT
sort countries_gwid
merge countries_gwid using rdummies_2
drop _merge
drop if id == .

// Merge in first democracy dataset (Note: GW-COW mismatch possible)
//cd "/Users/larsc/polybox/Shared/NASTACdb/StateSizeProject"
cd $ROOT
sort countries_gwid year
merge 1:1 countries_gwid year using p4v
drop _merge
drop if id == .

// Merge in Kristian's GW-optimized democracy data (Polity)
//cd "/Users/larsc/polybox/Shared/NASTACdb/StateSizeProject"
cd $ROOT
sort countries_gwid year
merge 1:1 countries_gwid year using ksgp4use
drop _merge
drop if id == .

// Merge with trade data
cd $OUTDIR
sort countries_gwid year
merge 1:1 countries_gwid year using tradedata
drop _merge
drop if id == .


xtset id year

// Define democracy variables (dem relates to Kristian's GW data)
gen dem = .
replace dem = 0 if polity <6 & polity != .
replace dem = 1 if polity >=6 & polity != .

// Alternative democracy data, not used
gen dem2 = .
replace dem2 = 0 if polity2 <6 & polity2 != .
replace dem2 = 1 if polity2 >=6 & polity2 != .


// Alternative state age counter baesd on external data (beyond sample)
gen stateage = state_age
replace stateage = age if state_age==.

gen lstateage = log(stateage+1)

gen lntrade = log(tradeopenness)
gen lltrade = l.lntrade
	
cd $OUTDIR
save analysis_data.dta, replace
